The data set contains information about three species of IRIS flowers namely:
Four features are collected from each sample, sepal-length, sepal-width, petal-length and petal-width in centi-meters.
# Common imports
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import plotly.express as px
iris_df = pd.read_csv("./data/IRIS.csv.gz", compression="gzip")
iris_df.sample(5)
| Unnamed: 0 | sepal_length | sepal_width | petal_length | petal_width | species | |
|---|---|---|---|---|---|---|
| 144 | 144 | 6.7 | 3.3 | 5.7 | 2.5 | Iris-virginica |
| 48 | 48 | 5.3 | 3.7 | 1.5 | 0.2 | Iris-setosa |
| 52 | 52 | 6.9 | 3.1 | 4.9 | 1.5 | Iris-versicolor |
| 23 | 23 | 5.1 | 3.3 | 1.7 | 0.5 | Iris-setosa |
| 78 | 78 | 6.0 | 2.9 | 4.5 | 1.5 | Iris-versicolor |
fig = px.scatter_3d(iris_df, x='sepal_length', y='sepal_width', z='petal_width',
color='species', template="plotly_dark")
fig.update_layout(margin={"r":0,"t":0,"l":0,"b":0})
fig.show()
covid_df = pd.read_csv("./data/owid-covid-data.csv.gz", compression="gzip")
covid_df.sample(5)
| Unnamed: 0 | iso_code | continent | location | date | total_cases | new_cases | new_cases_smoothed | total_deaths | new_deaths | ... | female_smokers | male_smokers | handwashing_facilities | hospital_beds_per_thousand | life_expectancy | human_development_index | excess_mortality_cumulative_absolute | excess_mortality_cumulative | excess_mortality | excess_mortality_cumulative_per_million | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 10853 | 10853 | AZE | Asia | Azerbaijan | 2021-05-24 | 332610.0 | 156.0 | 334.429 | 4867.0 | 7.0 | ... | 0.3 | 42.5 | 83.241 | 4.70 | 73.00 | 0.756 | NaN | NaN | NaN | NaN |
| 100174 | 100174 | MCO | Europe | Monaco | 2021-03-14 | 2107.0 | 1.0 | 12.000 | 27.0 | 0.0 | ... | NaN | NaN | NaN | 13.80 | 86.75 | NaN | NaN | NaN | NaN | NaN |
| 106753 | 106753 | NLD | Europe | Netherlands | 2020-06-14 | 49025.0 | 165.0 | 172.714 | 6074.0 | 2.0 | ... | 24.4 | 27.3 | NaN | 3.32 | 82.28 | 0.944 | 6192.8 | 8.28 | -0.81 | 360.610616 |
| 57792 | 57792 | DEU | Europe | Germany | 2021-01-18 | 2059382.0 | 9253.0 | 16895.143 | 47263.0 | 362.0 | ... | 28.2 | 33.1 | NaN | 8.00 | 81.33 | 0.947 | NaN | NaN | NaN | NaN |
| 127086 | 127086 | SPM | North America | Saint Pierre and Miquelon | 2020-05-16 | 1.0 | 0.0 | 0.000 | NaN | NaN | ... | NaN | NaN | NaN | NaN | 81.07 | NaN | NaN | NaN | NaN | NaN |
5 rows × 68 columns
required_columns = ["iso_code", "location", "continent", "date", "new_cases_smoothed", "total_cases"]
covid_df = covid_df.dropna(subset = required_columns)
covid_df = covid_df.sort_values("date")
covid_df[['iso_code', 'location']].sample(8)
| iso_code | location | |
|---|---|---|
| 108579 | NZL | New Zealand |
| 44672 | EGY | Egypt |
| 113531 | NOR | Norway |
| 160272 | VUT | Vanuatu |
| 28832 | CYM | Cayman Islands |
| 5829 | ATG | Antigua and Barbuda |
| 162045 | VNM | Vietnam |
| 51780 | FLK | Falkland Islands |
covid_day_df = covid_df[covid_df.date == "2021-12-23"]
covid_day_df.sample(5)
| Unnamed: 0 | iso_code | continent | location | date | total_cases | new_cases | new_cases_smoothed | total_deaths | new_deaths | ... | female_smokers | male_smokers | handwashing_facilities | hospital_beds_per_thousand | life_expectancy | human_development_index | excess_mortality_cumulative_absolute | excess_mortality_cumulative | excess_mortality | excess_mortality_cumulative_per_million | |
|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|---|
| 6568 | 6568 | ARG | South America | Argentina | 2021-12-23 | 5428957.0 | 13456.0 | 7473.571 | 116979.0 | 15.0 | ... | 16.2 | 27.7 | NaN | 5.00 | 76.67 | 0.845 | NaN | NaN | NaN | NaN |
| 79511 | 79511 | KAZ | Asia | Kazakhstan | 2021-12-23 | 1068899.0 | 536.0 | 480.143 | 18160.0 | 15.0 | ... | 7.0 | 43.1 | 98.999 | 6.70 | 73.60 | 0.825 | NaN | NaN | NaN | NaN |
| 164594 | 164594 | YEM | Asia | Yemen | 2021-12-23 | 10105.0 | 2.0 | 2.714 | 1981.0 | 0.0 | ... | 7.6 | 29.2 | 49.542 | 0.70 | 66.12 | 0.470 | NaN | NaN | NaN | NaN |
| 75445 | 75445 | ISR | Asia | Israel | 2021-12-23 | 1360912.0 | 1745.0 | 1090.143 | 8241.0 | 2.0 | ... | 15.4 | 35.4 | NaN | 2.99 | 82.97 | 0.919 | NaN | NaN | NaN | NaN |
| 16183 | 16183 | BLZ | North America | Belize | 2021-12-23 | 31444.0 | 57.0 | 32.429 | 592.0 | 1.0 | ... | NaN | NaN | 90.083 | 1.30 | 74.62 | 0.716 | NaN | NaN | NaN | NaN |
5 rows × 68 columns
fig = px.scatter_geo(covid_day_df, locations="iso_code", color="continent",
hover_name="location", size="new_cases_smoothed",
projection="natural earth", template="plotly_dark")
fig.show()
fig = px.scatter_geo(covid_df, locations="iso_code", color="continent",
hover_name="location", size="total_cases",
projection="natural earth", animation_frame="date", template="plotly_dark")
fig.show()
import torchvision
import os
import matplotlib.pyplot as plt
from matplotlib import rc
from matplotlib.animation import FuncAnimation
from matplotlib import animation
rc('animation', html='jshtml')
frn = 10 # Number of frames to process in the animation
fps = 0.5 # Frames per second
mywriter = animation.PillowWriter(fps=fps)
mnist_dataset = torchvision.datasets.MNIST(root = "data/mnist", train = True, download = True, transform=torchvision.transforms.ToTensor())
fig, ax = plt.subplots(figsize = (10, 10))
def change_plot(frame_idx):
ax.cla()
image_tensor = mnist_dataset[frame_idx][0]
image_tensor_gray = image_tensor[0]
image_tensor_gray = image_tensor_gray * 255
ax.matshow(image_tensor_gray, cmap = "gray")
for i in range(image_tensor_gray.shape[0]):
for j in range(image_tensor_gray.shape[1]):
ax.text(i, j, str(int(image_tensor_gray[j][i].item())), va = "center", ha = "center", color = "blue", fontsize = "small")
ax.axis("off")
plt.tight_layout()
anim = FuncAnimation(fig, change_plot, frn, interval=1000 / fps)
plt.close()
anim
<Figure size 432x288 with 0 Axes>
mywriter = animation.PillowWriter(fps=fps)
if not os.path.exists("./assets/gif"):
os.makedir("./assets/gif")
anim.save('./assets/gif/mnist.gif',writer=mywriter)
<Figure size 432x288 with 0 Axes>